Imports¶
In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize
from scipy.stats import bernoulli
from scipy.special import expit as sigmoid
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import ConstantKernel, RBF
Load data¶
In [2]:
TRAIN = pd.read_csv('data/preprocessed_train.csv')
VAL = pd.read_csv('data/preprocessed_val.csv')
TEST = pd.read_csv('data/preprocessed_test.csv')
Implementation¶
In [3]:
import numpy as np
from scipy.optimize import minimize
from scipy.linalg import cholesky, solve_triangular
from scipy.special import expit
def rbf_kernel(X1, X2, length_scale=1.0):
sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)
return np.exp(-0.5 * sqdist / length_scale**2)
class GaussianProcessClassifier:
def __init__(self, kernel=rbf_kernel, length_scale=1.0, noise=1e-6):
self.kernel = kernel
self.length_scale = length_scale
self.noise = noise
def fit(self, X, y):
self.X_train = X
self.y_train = y
K = self.kernel(X, X, self.length_scale) + self.noise * np.eye(len(X))
self.L_ = cholesky(K, lower=True)
self.alpha_ = np.zeros_like(y, dtype=float)
def objective(alpha):
return 0.5 * np.dot(alpha.T, np.dot(K, alpha)) - np.sum(expit(y * alpha))
def grad(alpha):
return np.dot(K, alpha) - y * expit(-y * alpha)
result = minimize(objective, self.alpha_, jac=grad, method='L-BFGS-B')
self.alpha_ = result.x
def predict_proba(self, X):
K_trans = self.kernel(X, self.X_train, self.length_scale)
f_star = np.dot(K_trans, self.alpha_)
v = solve_triangular(self.L_, K_trans.T, lower=True)
var_f_star = np.diag(self.kernel(X, X, self.length_scale)) - np.sum(v**2, axis=0)
proba = expit(f_star / np.sqrt(1 + np.pi * var_f_star / 8))
return proba
def predict(self, X):
return np.sign(self.predict_proba(X) - 0.5)
class MultiClassGaussianProcessClassifier:
"""
Implements OVR classification using binary GP classifiers.
"""
def __init__(self, kernel=rbf_kernel, length_scale=1.0, noise=1e-6):
self.kernel = kernel
self.length_scale = length_scale
self.noise = noise
self.classifiers = {}
def fit(self, X, y):
self.classes_ = np.unique(y)
for cls in self.classes_:
y_binary = np.where(y == cls, 1, -1)
gpc = GaussianProcessClassifier(kernel=self.kernel, length_scale=self.length_scale, noise=self.noise)
gpc.fit(X, y_binary)
self.classifiers[cls] = gpc
def predict_proba(self, X):
proba = np.zeros((X.shape[0], len(self.classes_)))
for idx, cls in enumerate(self.classes_):
proba[:, idx] = self.classifiers[cls].predict_proba(X)
proba = np.clip(proba, 1e-10, 1-1e-10)
proba /= proba.sum(axis=1, keepdims=True)
return proba
def predict(self, X):
proba = self.predict_proba(X)
return self.classes_[np.argmax(proba, axis=1)]
Experiments¶
Accuracy sanity check¶
In [4]:
X_train = TRAIN.drop(columns='y').to_numpy()
y_train = TRAIN.y.to_numpy()
X_test = VAL.drop(columns='y').to_numpy()
y_test = VAL.y.to_numpy()
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=1.0, noise=1e-6)
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test)
(y_pred == y_test).mean()
Out[4]:
0.6498422712933754
Default params¶
In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize
def evaluate_model(X_train, y_train, X_test, y_test, num_runs=20):
accuracies = []
f1_scores = []
precisions = []
recalls = []
aucs = []
for _ in range(num_runs):
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=1.0, noise=1e-6)
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test)
accuracies.append(accuracy_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
precisions.append(precision_score(y_test, y_pred, average='weighted'))
recalls.append(recall_score(y_test, y_pred, average='weighted'))
y_test_bin = label_binarize(y_test, classes=np.unique(y_train))
y_pred_proba = multi_gpc.predict_proba(X_test)
aucs.append(roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr'))
print(f"Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
print(f"F1 Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
print(f"Precision: {np.mean(precisions):.3f} ± {np.std(precisions):.3f}")
print(f"Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
print(f"AUC: {np.mean(aucs):.3f} ± {np.std(aucs):.3f}")
fig, ax = plt.subplots(1, 5, figsize=(20, 4))
metrics = [accuracies, f1_scores, precisions, recalls, aucs]
titles = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC']
for i in range(5):
ax[i].boxplot(metrics[i])
ax[i].set_title(titles[i])
ax[i].set_xticks([1])
ax[i].set_xticklabels([titles[i]])
plt.tight_layout()
plt.show()
Scale param vs metrics (noise fixed)¶
In [6]:
import itertools
def fixed_noise(X_train, y_train, X_test, y_test, length_scales, noise=1e-6, num_runs=1):
results = []
for length_scale in length_scales:
accuracies = []
f1_scores = []
precisions = []
recalls = []
aucs = []
for _ in range(num_runs):
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=length_scale, noise=noise)
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test)
accuracies.append(accuracy_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
precisions.append(precision_score(y_test, y_pred, average='weighted'))
recalls.append(recall_score(y_test, y_pred, average='weighted'))
y_test_bin = label_binarize(y_test, classes=np.unique(y_train))
y_pred_proba = multi_gpc.predict_proba(X_test)
aucs.append(roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr'))
results.append({
'length_scale': length_scale,
'accuracy_mean': np.mean(accuracies),
'accuracy_std': np.std(accuracies),
'f1_mean': np.mean(f1_scores),
'f1_std': np.std(f1_scores),
'precision_mean': np.mean(precisions),
'precision_std': np.std(precisions),
'recall_mean': np.mean(recalls),
'recall_std': np.std(recalls),
'auc_mean': np.mean(aucs),
'auc_std': np.std(aucs)
})
return results
# Plotting function
def plot_results(results):
fig, ax = plt.subplots(2, 3, figsize=(18, 12))
metrics = ['accuracy', 'f1', 'precision', 'recall', 'auc']
titles = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC']
for i, metric in enumerate(metrics):
metric_mean = [res[f'{metric}_mean'] for res in results]
metric_std = [res[f'{metric}_std'] for res in results]
ax[i // 3, i % 3].plot(range(len(results)), metric_mean, marker='o', label=f'{titles[i]} Mean')
ax[i // 3, i % 3].fill_between(range(len(results)),
np.array(metric_mean) - np.array(metric_std),
np.array(metric_mean) + np.array(metric_std),
alpha=0.2, label=f'{titles[i]} Std')
ax[i // 3, i % 3].set_title(titles[i])
ax[i // 3, i % 3].set_xticks(range(len(results)))
ax[i // 3, i % 3].set_xticklabels([f"LS: {res['length_scale']}" for res in results], rotation=45, ha="right")
ax[i // 3, i % 3].legend()
ax[1,2].set_visible(False)
plt.tight_layout()
plt.show()
In [7]:
length_scales = [0.1, 0.2, 0.5, 1.0, 5.0, 10.0]
fixed_noise_val = 1e-6
results = fixed_noise(X_train, y_train, X_test, y_test, length_scales, noise=fixed_noise_val)
/home/wiktorsadowy/Desktop/Labs/PGM/project-obesity_estimation_habits/venv/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1517: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
In [8]:
plot_results(results)
Noise param vs metrics (scale fixed)¶
In [9]:
def fixed_scale(X_train, y_train, X_test, y_test, noises, length_scale=1.0, num_runs=1):
results = []
for noise in noises:
accuracies = []
f1_scores = []
precisions = []
recalls = []
aucs = []
for _ in range(num_runs):
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=length_scale, noise=noise)
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test)
accuracies.append(accuracy_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
precisions.append(precision_score(y_test, y_pred, average='weighted'))
recalls.append(recall_score(y_test, y_pred, average='weighted'))
y_test_bin = label_binarize(y_test, classes=np.unique(y_train))
y_pred_proba = multi_gpc.predict_proba(X_test)
aucs.append(roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr'))
results.append({
'noise': noise,
'accuracy_mean': np.mean(accuracies),
'accuracy_std': np.std(accuracies),
'f1_mean': np.mean(f1_scores),
'f1_std': np.std(f1_scores),
'precision_mean': np.mean(precisions),
'precision_std': np.std(precisions),
'recall_mean': np.mean(recalls),
'recall_std': np.std(recalls),
'auc_mean': np.mean(aucs),
'auc_std': np.std(aucs)
})
return results
In [10]:
length_scale = 1.0
noises = [1e-3,2e-3,3e-3, 4e-3,5e-3,6e-3,7e-3,8e-3, 1e-4, 5e-4,1e-5, 5e-5, 1e-6]
results = fixed_scale(X_train, y_train, X_test, y_test, noises, length_scale=length_scale)
In [11]:
# Plotting function
def plot_nresults(results):
fig, ax = plt.subplots(2, 3, figsize=(18, 12))
metrics = ['accuracy', 'f1', 'precision', 'recall', 'auc']
titles = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC']
for i, metric in enumerate(metrics):
metric_mean = [res[f'{metric}_mean'] for res in results]
metric_std = [res[f'{metric}_std'] for res in results]
ax[i // 3, i % 3].plot(range(len(results)), metric_mean, marker='o', label=f'{titles[i]} Mean')
ax[i // 3, i % 3].fill_between(range(len(results)),
np.array(metric_mean) - np.array(metric_std),
np.array(metric_mean) + np.array(metric_std),
alpha=0.2, label=f'{titles[i]} Std')
ax[i // 3, i % 3].set_title(titles[i])
ax[i // 3, i % 3].set_xticks(range(len(results)))
ax[i // 3, i % 3].set_xticklabels([f"noise: {res['noise']}" for res in results], rotation=45, ha="right")
ax[i // 3, i % 3].legend()
ax[1,2].set_visible(False)
plt.tight_layout()
plt.show()
In [12]:
plot_nresults(results)
Hyperparam tuning with Optuna¶
In [13]:
import optuna
In [14]:
from sklearn.metrics import f1_score
def objective(trial):
param = {
'length_scale': trial.suggest_float('length_scale', 0.1, 15),
'noise': trial.suggest_float('noise', 1e-6, 1e-3)
}
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=param['length_scale'], noise=param['noise'])
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test) # to walidacyjny tak naprawde
f1 = f1_score(y_test, y_pred, average='weighted')
return f1
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
best_trial = study.best_trial
[I 2024-06-23 02:03:44,880] A new study created in memory with name: no-name-28ec279b-f38c-4849-ad89-cf68d3ff1e1b
[I 2024-06-23 02:03:47,079] Trial 0 finished with value: 0.6845929539008032 and parameters: {'length_scale': 4.611536992057886, 'noise': 0.000952948459085394}. Best is trial 0 with value: 0.6845929539008032.
[I 2024-06-23 02:03:49,736] Trial 1 finished with value: 0.6887354999749351 and parameters: {'length_scale': 5.752983950461434, 'noise': 0.0006817955110651472}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:03:53,588] Trial 2 finished with value: 0.6358283457274838 and parameters: {'length_scale': 1.5059730649315877, 'noise': 0.00020313116876976027}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:03:56,683] Trial 3 finished with value: 0.6385495948844573 and parameters: {'length_scale': 1.964726749082854, 'noise': 0.00041088512447725413}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:04:00,491] Trial 4 finished with value: 0.6745611558783597 and parameters: {'length_scale': 4.166465970304931, 'noise': 0.00037541063630730574}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:04:06,256] Trial 5 finished with value: 0.6565159864431629 and parameters: {'length_scale': 12.906746659526393, 'noise': 0.0007381105517303227}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:04:08,750] Trial 6 finished with value: 0.6609526396643057 and parameters: {'length_scale': 7.134971111780753, 'noise': 0.00041460704445227134}. Best is trial 1 with value: 0.6887354999749351.
[I 2024-06-23 02:04:13,713] Trial 7 finished with value: 0.7042678021529678 and parameters: {'length_scale': 10.243958960308081, 'noise': 0.0009476406710073392}. Best is trial 7 with value: 0.7042678021529678.
[I 2024-06-23 02:04:16,864] Trial 8 finished with value: 0.6348102419391778 and parameters: {'length_scale': 14.901163975452882, 'noise': 3.323375800234764e-05}. Best is trial 7 with value: 0.7042678021529678.
[I 2024-06-23 02:04:19,277] Trial 9 finished with value: 0.6640294059870975 and parameters: {'length_scale': 3.535293281701322, 'noise': 0.0006602899528196022}. Best is trial 7 with value: 0.7042678021529678.
[I 2024-06-23 02:04:23,085] Trial 10 finished with value: 0.71414598148484 and parameters: {'length_scale': 10.213139367057337, 'noise': 0.0009393189009335356}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:26,487] Trial 11 finished with value: 0.6874581905528677 and parameters: {'length_scale': 10.109536130363505, 'noise': 0.0009290742453592621}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:32,831] Trial 12 finished with value: 0.7093041020973584 and parameters: {'length_scale': 10.330553044857313, 'noise': 0.0008342467007795757}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:36,566] Trial 13 finished with value: 0.6968782187295041 and parameters: {'length_scale': 9.683455899664239, 'noise': 0.0008384952756487626}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:41,920] Trial 14 finished with value: 0.7103544315788611 and parameters: {'length_scale': 12.1824174168448, 'noise': 0.0005963239118806788}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:46,119] Trial 15 finished with value: 0.6923475804899197 and parameters: {'length_scale': 12.870876475133889, 'noise': 0.0005358287407471893}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:49,747] Trial 16 finished with value: 0.653572683806264 and parameters: {'length_scale': 12.589197378627896, 'noise': 0.0005676897808119632}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:52,928] Trial 17 finished with value: 0.6908864443170488 and parameters: {'length_scale': 8.252690750232835, 'noise': 0.00025646895124118146}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:04:57,878] Trial 18 finished with value: 0.6799114936853856 and parameters: {'length_scale': 12.138145962729933, 'noise': 0.0007937068394790643}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:05:01,355] Trial 19 finished with value: 0.6530094692413292 and parameters: {'length_scale': 14.661483557600276, 'noise': 0.0005932397929222147}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:05:03,902] Trial 20 finished with value: 0.6818082842657415 and parameters: {'length_scale': 8.29083734638308, 'noise': 0.0002475118909553889}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:05:07,250] Trial 21 finished with value: 0.7047665061511472 and parameters: {'length_scale': 11.126323013077165, 'noise': 0.0008490253170193003}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:05:10,783] Trial 22 finished with value: 0.6256393258317936 and parameters: {'length_scale': 11.41974790924251, 'noise': 0.0008598892813256949}. Best is trial 10 with value: 0.71414598148484.
[I 2024-06-23 02:05:14,327] Trial 23 finished with value: 0.7227921271787708 and parameters: {'length_scale': 9.292137392223605, 'noise': 0.0009966791185139952}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:18,669] Trial 24 finished with value: 0.7048581273207862 and parameters: {'length_scale': 8.823738194011977, 'noise': 0.0009974143063452008}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:21,758] Trial 25 finished with value: 0.6458640043063653 and parameters: {'length_scale': 14.079060920350638, 'noise': 0.0007369234814513301}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:24,642] Trial 26 finished with value: 0.6805731159737259 and parameters: {'length_scale': 6.299590672517915, 'noise': 0.0009069916254296417}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:28,116] Trial 27 finished with value: 0.7039881040402258 and parameters: {'length_scale': 9.109413486316598, 'noise': 0.0004713422415398581}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:30,979] Trial 28 finished with value: 0.6250296882914866 and parameters: {'length_scale': 13.459627334463311, 'noise': 0.0006347393863844094}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:34,626] Trial 29 finished with value: 0.6820009968308309 and parameters: {'length_scale': 11.553090679286466, 'noise': 0.0009697588920865407}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:37,203] Trial 30 finished with value: 0.703557306535188 and parameters: {'length_scale': 6.848804570755373, 'noise': 0.0007479080555016686}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:40,375] Trial 31 finished with value: 0.7167333794640685 and parameters: {'length_scale': 10.427963364043919, 'noise': 0.0008976974914749867}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:43,175] Trial 32 finished with value: 0.7044687660825666 and parameters: {'length_scale': 10.905119040459262, 'noise': 0.0009095841490784758}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:47,825] Trial 33 finished with value: 0.698459732047161 and parameters: {'length_scale': 7.934956754370662, 'noise': 0.0007814612140370951}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:51,502] Trial 34 finished with value: 0.7143469061428178 and parameters: {'length_scale': 9.431576846274426, 'noise': 0.0009728009023703586}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:55,164] Trial 35 finished with value: 0.7134573105492341 and parameters: {'length_scale': 9.434133553154329, 'noise': 0.000999474354185778}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:05:57,910] Trial 36 finished with value: 0.6985455701275298 and parameters: {'length_scale': 6.067738513739033, 'noise': 0.0008867069325521461}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:01,067] Trial 37 finished with value: 0.688088426365455 and parameters: {'length_scale': 4.984021707944461, 'noise': 0.0009500510696019484}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:04,387] Trial 38 finished with value: 0.6973973141635893 and parameters: {'length_scale': 7.121068513314034, 'noise': 0.0006952321345564625}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:08,772] Trial 39 finished with value: 0.591674196433422 and parameters: {'length_scale': 0.5262736545269959, 'noise': 0.0008164876262084029}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:11,737] Trial 40 finished with value: 0.7072140771155303 and parameters: {'length_scale': 8.625277171468332, 'noise': 0.0008877048219217033}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:16,604] Trial 41 finished with value: 0.6801164249819195 and parameters: {'length_scale': 9.992834548226169, 'noise': 0.0009984394237631954}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:19,421] Trial 42 finished with value: 0.6948469945163177 and parameters: {'length_scale': 9.334691701169438, 'noise': 0.0009505950793335996}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:22,641] Trial 43 finished with value: 0.6641838176469255 and parameters: {'length_scale': 10.438571614904754, 'noise': 0.0009946625090571464}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:25,935] Trial 44 finished with value: 0.683269808720048 and parameters: {'length_scale': 7.634390655190989, 'noise': 0.0009235265115079443}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:28,639] Trial 45 finished with value: 0.7040439358674444 and parameters: {'length_scale': 9.361029974706662, 'noise': 0.0008792521141986145}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:32,043] Trial 46 finished with value: 0.7088840227375796 and parameters: {'length_scale': 10.699375454363597, 'noise': 0.00010913347260326813}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:35,816] Trial 47 finished with value: 0.6413560736056952 and parameters: {'length_scale': 11.856644143944095, 'noise': 0.0009502134997578169}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:38,429] Trial 48 finished with value: 0.6693752704763964 and parameters: {'length_scale': 9.672465315929585, 'noise': 0.0003468166878765967}. Best is trial 23 with value: 0.7227921271787708.
[I 2024-06-23 02:06:41,778] Trial 49 finished with value: 0.6702046524527897 and parameters: {'length_scale': 8.740153428255411, 'noise': 0.0007780164626071922}. Best is trial 23 with value: 0.7227921271787708.
In [15]:
best_trial
Out[15]:
FrozenTrial(number=23, state=1, values=[0.7227921271787708], datetime_start=datetime.datetime(2024, 6, 23, 2, 5, 10, 789558), datetime_complete=datetime.datetime(2024, 6, 23, 2, 5, 14, 327692), params={'length_scale': 9.292137392223605, 'noise': 0.0009966791185139952}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'length_scale': FloatDistribution(high=15.0, log=False, low=0.1, step=None), 'noise': FloatDistribution(high=0.001, log=False, low=1e-06, step=None)}, trial_id=23, value=None)
In [16]:
# Use only interactively, will be empty in Jupyter Lab due to optuna errors
def visualize_optuna(study):
import optuna.visualization as vis
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()
optuna.visualization.plot_slice(study).show()
optuna.visualization.plot_parallel_coordinate(study).show()
In [17]:
visualize_optuna(study)
Final eval¶
In [18]:
X_train = TRAIN.drop(columns='y').to_numpy()
y_train = TRAIN.y.to_numpy()
X_val = VAL.drop(columns='y').to_numpy()
y_val = VAL.y.to_numpy()
X_test = TEST.drop(columns='y').to_numpy()
y_test = TEST.y.to_numpy()
X_train_combined = np.concatenate((X_train, X_val), axis=0)
y_train_combined = np.concatenate((y_train, y_val), axis=0)
In [19]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize
def evaluate_final_model(X_train, y_train, X_test, y_test, num_runs=20):
accuracies = []
f1_scores = []
precisions = []
recalls = []
aucs = []
for _ in range(num_runs):
multi_gpc = MultiClassGaussianProcessClassifier(length_scale=0.9370589034485247, noise=0.0007533926368261847)
multi_gpc.fit(X_train, y_train)
y_pred = multi_gpc.predict(X_test)
accuracies.append(accuracy_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
precisions.append(precision_score(y_test, y_pred, average='weighted'))
recalls.append(recall_score(y_test, y_pred, average='weighted'))
y_test_bin = label_binarize(y_test, classes=np.unique(y_train))
y_pred_proba = multi_gpc.predict_proba(X_test)
aucs.append(roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr'))
print(f"Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
print(f"F1 Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
print(f"Precision: {np.mean(precisions):.3f} ± {np.std(precisions):.3f}")
print(f"Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
print(f"AUC: {np.mean(aucs):.3f} ± {np.std(aucs):.3f}")
fig, ax = plt.subplots(1, 5, figsize=(20, 4))
metrics = [accuracies, f1_scores, precisions, recalls, aucs]
titles = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC']
for i in range(5):
ax[i].boxplot(metrics[i])
ax[i].set_title(titles[i])
ax[i].set_xticks([1])
ax[i].set_xticklabels([titles[i]])
plt.tight_layout()
plt.show()
In [20]:
evaluate_final_model(X_train_combined, y_train_combined, X_test, y_test)
Accuracy: 0.719 ± 0.000 F1 Score: 0.714 ± 0.000 Precision: 0.713 ± 0.000 Recall: 0.719 ± 0.000 AUC: 0.905 ± 0.000